from urllib.request import urlopen

from pdfminer.pdfinterp import PDFResourceManager,process_pdf
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from io import StringIO,open

'''
textPage = urlopen("http://www.pythonscraping.com/pages/warandpeace/chapter1-ru.txt")
print(textPage.read().decode('utf-8'))

'''
#读取pdf文档

def read_pdf(pdf_file):
    rsrcmgr = PDFResourceManager()
    retstr = StringIO()
    laparams = LAParams()
    device = TextConverter(rsrcmgr,retstr,laparams=laparams)
    process_pdf(rsrcmgr,device,pdf_file)
    device.close()

    content = retstr.getvalue()
    retstr.close()
    return content

pdf = urlopen("http://pythonscraping.com/pages/warandpeace/chapter1.pdf")
pdf_content = read_pdf(pdf)
print(pdf_content)
pdf.close()